# rm(list=ls())
# create class
setClass("waldCI",
slots = c(level = "numeric",
mean = "numeric",
sterr = "numeric",
lb = "numeric",
ub = "numeric"))PS4
Problem 1
Create a class to represent Wald-style normal approximation Confidence Intervals. Do this using S4.
- For the waldCI class, define the following:
- A constructor, which takes in a confidence level (0,1) and either a mean and standard error, or a lower and upper bound. This should be a custom constructor, not new() or waldCI().
- A validator.
For this section, I created a validator first so that I can use it in the constructor.
# create validator for use in constructor
setValidity("waldCI", function(object) {
# needed for part c
# negative standard error
if (object@sterr <= 0) {
stop(paste("@sterr = ", object@sterr, " cannot be negative"))
}
# lb > ub
if (object@lb >= object@ub) {
stop(paste("@lb = ", object@lb, " cannot be larger than @ub = ", object@ub))
}
# infinite bounds
if (!is.finite(object@lb) || !is.finite(object@ub)) {
stop(paste("Bounds must be finite"))
}
return(TRUE)
})Class "waldCI" [in ".GlobalEnv"]
Slots:
Name: level mean sterr lb ub
Class: numeric numeric numeric numeric numeric
# constructor
waldCI <- function(level, mean = NULL, sterr = NULL, lb = NULL, ub = NULL) {
# check level
if (missing(level) || level <= 0 || level >= 1) {
stop(paste("Confidence level must be a value between (0,1)"))
}
# fill in empty slots
alpha <- 1 - level
z <- qnorm(1 - alpha / 2)
if (!is.null(mean) && !is.null(sterr)) {
lb <- mean - z * sterr
ub <- mean + z * sterr
} else if (!is.null(lb) && !is.null(ub)) {
mean <- (ub + lb) / 2
sterr <- (ub - lb) / (2 * z)
} else {
stop("You must provide either (mean and se) or (lower and upper).")
}
object <- new("waldCI", level = level, mean = mean, sterr = sterr, lb = lb, ub = ub)
validObject(object)
return(object)
}- A show method.
setGeneric("show",
function(object, ...) {
standardGeneric("show")
})Creating a new generic function for 'show' in the global environment
[1] "show"
setMethod("show", "waldCI",
function(object, ...) {
cat("Wald Confidence Interval\n")
cat(" Confidence level: ", object@level, "\n")
cat(" Mean: ", object@mean, "\n")
cat(" Standard error: ", object@sterr, "\n")
cat(" CI: [", object@lb, ", ", object@ub, "]\n")
return(invisible(object))
}
)- Accessors: lb, ub, mean, sterr, level.
setGeneric("lb", function(object, ...) standardGeneric("lb"))[1] "lb"
setMethod("lb", "waldCI", function(object, ...) object@lb)
setGeneric("ub", function(object, ...) standardGeneric("ub"))[1] "ub"
setMethod("ub", "waldCI", function(object, ...) object@ub)
setGeneric("mean", function(object, ...) standardGeneric("mean"))Creating a new generic function for 'mean' in the global environment
[1] "mean"
setMethod("mean", "waldCI", function(object, ...) object@mean)
setGeneric("sterr", function(object, ...) standardGeneric("sterr"))[1] "sterr"
setMethod("sterr", "waldCI", function(object, ...) object@sterr)
setGeneric("level", function(object, ...) standardGeneric("level"))[1] "level"
setMethod("level", "waldCI", function(object, ...) object@level)- Setters: lb, ub, mean, sterr, level. Be sure to validate the resulting waldCI.
setGeneric("lb<-",
function(object, value, ...) {
standardGeneric("lb<-")
})[1] "lb<-"
setMethod("lb<-", "waldCI",
function(object, value, ...) {
object@lb <- value
validObject(object)
return(object)
}
)
setGeneric("ub<-",
function(object, value, ...) {
standardGeneric("ub<-")
})[1] "ub<-"
setMethod("ub<-", "waldCI",
function(object, value, ...) {
object@ub <- value
validObject(object)
return(object)
}
)
setGeneric("mean<-",
function(object, value, ...) {
standardGeneric("mean<-")
})[1] "mean<-"
setMethod("mean<-", "waldCI",
function(object, value, ...) {
object@mean <- value
validObject(object)
return(object)
}
)
setGeneric("sterr<-",
function(object, value, ...) {
standardGeneric("sterr<-")
})[1] "sterr<-"
setMethod("sterr<-", "waldCI",
function(object, value, ...) {
object@sterr <- value
validObject(object)
return(object)
}
)
setGeneric("level<-",
function(object, value, ...) {
standardGeneric("level<-")
})[1] "level<-"
setMethod("level<-", "waldCI",
function(object, value, ...) {
object@level <- value
validObject(object)
return(object)
}
)- A contains method, returning a logical of whether a value is within a CI.
setGeneric("contains", function(object, value, ...) standardGeneric("contains"))[1] "contains"
setMethod("contains", "waldCI",
function(object, value, ...) {
return(value >= object@lb && value <= object@ub)
})- An overlap method, that takes in two waldCI’s, and returns a logical of whether the two confidence intervals overlap.
setGeneric("overlap", function(ci1, ci2, ...) standardGeneric("overlap"))[1] "overlap"
setMethod("overlap", signature(ci1 = "waldCI", ci2 = "waldCI"),
function(ci1, ci2, ...) {
return(max(ci1@lb, ci2@lb) <= min(ci1@ub, ci2@ub))
})- as.numeric to return c(lb, ub). (Hint: The second argument of setGeneric is not needed when an existing s3 function uses the .Primitive function.)
setMethod("as.numeric", "waldCI", function(x, ...) {
c(x@lb, x@ub)
})- transformCI which takes in a function and a waldCI, and returns the transformed waldCI object. Warn the user that only monotonic functions make sense.
setGeneric("transformCI", function(object, foo, ...) standardGeneric("transformCI"))[1] "transformCI"
setMethod("transformCI", signature(object = "waldCI", foo = "function"),
function(object, foo, ...) {
warning("Warning: Only monotonic functions make sense for transformations")
new_lb <- foo(object@lb)
new_ub <- foo(object@ub)
waldCI(level = object@level,
lb = new_lb,
ub = new_ub)
})- Use your waldCI class to create three objects:
ci1: (17.2, 24.7), 95%
ci2: mean: 13, standard error: 2.5, 99%
ci3: (27.43, 39.22), 75%
ci1 <- waldCI(level = 0.95, lb = 17.2, ub = 24.7)
show(ci1)Wald Confidence Interval
Confidence level: 0.95
Mean: 20.95
Standard error: 1.9133
CI: [ 17.2 , 24.7 ]
ci2 <- waldCI(level = 0.99, mean = 13, sterr = 2.5)
show(ci2)Wald Confidence Interval
Confidence level: 0.99
Mean: 13
Standard error: 2.5
CI: [ 6.560427 , 19.43957 ]
ci3 <- waldCI(level = 0.75, lb = 27.43, ub = 39.22)
show(ci3)Wald Confidence Interval
Confidence level: 0.75
Mean: 33.325
Standard error: 5.12453
CI: [ 27.43 , 39.22 ]
Evaluate the following code:
ci1An object of class "waldCI"
Slot "level":
[1] 0.95
Slot "mean":
[1] 20.95
Slot "sterr":
[1] 1.9133
Slot "lb":
[1] 17.2
Slot "ub":
[1] 24.7
ci2An object of class "waldCI"
Slot "level":
[1] 0.99
Slot "mean":
[1] 13
Slot "sterr":
[1] 2.5
Slot "lb":
[1] 6.560427
Slot "ub":
[1] 19.43957
ci3An object of class "waldCI"
Slot "level":
[1] 0.75
Slot "mean":
[1] 33.325
Slot "sterr":
[1] 5.12453
Slot "lb":
[1] 27.43
Slot "ub":
[1] 39.22
as.numeric(ci1)[1] 17.2 24.7
as.numeric(ci2)[1] 6.560427 19.439573
as.numeric(ci3)[1] 27.43 39.22
lb(ci2)[1] 6.560427
ub(ci2)[1] 19.43957
mean(ci1)[1] 20.95
sterr(ci3)[1] 5.12453
level(ci2)[1] 0.99
lb(ci2) <- 10.5
mean(ci3) <- 34
level(ci3) <- .8
contains(ci1, 17)[1] FALSE
contains(ci3, 44)[1] FALSE
overlap(ci1, ci2)[1] TRUE
eci1 <- transformCI(ci1, sqrt)Warning in transformCI(ci1, sqrt): Warning: Only monotonic functions make sense
for transformations
eci1An object of class "waldCI"
Slot "level":
[1] 0.95
Slot "mean":
[1] 4.558599
Slot "sterr":
[1] 0.2098562
Slot "lb":
[1] 4.147288
Slot "ub":
[1] 4.969909
mean(transformCI(ci2, log))Warning in transformCI(ci2, log): Warning: Only monotonic functions make sense
for transformations
[1] 2.659343
- Show that your validator does not allow the creation of invalid confidence intervals:
negative standard error
lb > ub
Infinite bounds
invalid use of the setters
# negative sterr
try({waldCI(level = 0.95, mean = 10, sterr = -2)})Error in validityMethod(object) : @sterr = -2 cannot be negative
# lb > ub
try({waldCI(level = 0.95, lb = 10, ub = 5)})Error in validityMethod(object) :
@sterr = -1.27553364231164 cannot be negative
# infinite bounds
try({waldCI(level = 0.95, lb = -Inf, ub = Inf)})Error in validityMethod(object) : Bounds must be finite
# invalid use of setters
try({
ci <- waldCI(level = 0.95, mean = 10, sterr = 2)
lb(ci) <- 15 # This should trigger validation failure if ub < lb now
ub(ci) <- 5
validObject(ci)
})Error in validityMethod(object) :
@lb = 15 cannot be larger than @ub = 13.9199279690801
Problem 3
First, I load the ggplot2 library and the data. Currently, the “date” variable is a character variable, so I change the it to be of type Date.
# install.packages("plotly")
library(plotly)Warning: package 'plotly' was built under R version 4.4.3
Loading required package: ggplot2
Warning: package 'ggplot2' was built under R version 4.4.3
Attaching package: 'plotly'
The following object is masked from 'package:ggplot2':
last_plot
The following object is masked from 'package:stats':
filter
The following object is masked from 'package:graphics':
layout
library(dplyr)
Attaching package: 'dplyr'
The following object is masked _by_ '.GlobalEnv':
contains
The following objects are masked from 'package:stats':
filter, lag
The following objects are masked from 'package:base':
intersect, setdiff, setequal, union
library(lubridate)
Attaching package: 'lubridate'
The following objects are masked from 'package:base':
date, intersect, setdiff, union
us_states <- read.csv("data/us-states.csv")
us_states$date <- as.Date(us_states$date)- How many major and minor spikes in cases were there?
us_states_date <- us_states %>%
group_by(date) %>%
summarise(total_cases = sum(cases))
us_states_week <- us_states %>%
mutate(week = floor_date(date, "week")) %>%
group_by(week) %>%
summarise(cases_week_avg = sum(cases) / 7)
p <- plot_ly(us_states_date,
x = ~date, y = ~total_cases,
type = "scatter", mode = "markers",
marker = list(color = "slategray", opacity = 0.3, size = 4),
name = "Daily Cases") %>%
add_trace(
data = us_states_week,
x = ~week, y = ~cases_week_avg,
type = "scatter", mode = "lines",
line = list(width = 3),
name = "Weekly Average"
) %>%
layout(
title = "Major and Minor COVID-19 Case Spikes in the U.S.",
xaxis = list(title = "Date"),
yaxis = list(title = "Total Daily New Cases")
)
p A marker object has been specified, but markers is not in the mode
Adding markers to the mode...
# TODO: create color segments?- For the states with the highest and lowest overall rates per population, what differences do you see in their trajectories over time?
spectral <- grDevices::hcl.colors(length(unique(us_states$state)), "Spectral")
p <- plot_ly(us_states,
x = ~date, y = ~cases_avg_per_100k,
color = ~state, colors = spectral,
type = "scatter", mode = "lines",
line = list(width = 1.5), opacity = 0.5,
showlegend = FALSE,
text = ~paste(
"State:", state,
"<br>Date:", date,
"<br>Cases per 100k:", round(cases_avg_per_100k, 2)),
hoverinfo = "text") %>%
layout(
title = list(
text = "COVID-19 Case Rates by State"
),
xaxis = list(title = "Date"),
yaxis = list(title = "Average Cases Per 100k Residents")
)
p - Identify, to the best of your ability without a formal test, the first five states to experience Covid in a substantial way.
us_states_start <- us_states %>%
filter(date > as.Date("2020-03-20"),
date < as.Date("2020-04-20"),
cases_avg_per_100k > 15)
spectral <- grDevices::hcl.colors(length(unique(us_states_start$state)), "Spectral")
p <- plot_ly(us_states_start,
x = ~date, y = ~cases_avg_per_100k,
color = ~state, colors = spectral,
type = "scatter", mode = "lines",
line = list(width = 2),
text = ~paste(
"State:", state,
"<br>Date:", date,
"<br>Cases per 100k:", round(cases_avg_per_100k, 2)),
hoverinfo = "text") %>%
layout(
title = list(
text = "COVID-19 Case Rates by State (Mar–Apr 2020)"
),
xaxis = list(title = "Date"),
yaxis = list(title = "Average Cases Per 100k Residents"),
legend = list(title = list(text = "State"))
)
p Attribution